import numpy as np
import os
import matplotlib.pyplot as plt
import cv2
import warnings
warnings.filterwarnings('ignore')
import keras
from tensorflow.keras.optimizers import Adam
from keras.models import Sequential, Model
from keras.layers import Dense, LeakyReLU, Reshape, Flatten, Input
from keras.layers import Conv2D, MaxPooling2D, Activation, Dropout, Conv2DTranspose
from tensorflow.compat.v1.keras.layers import BatchNormalization
def list_images(basePath, contains=None):
# return the set of files that are valid
return list_files(basePath, validExts=(".jpg", ".jpeg", ".png", ".bmp"), contains=contains)
def list_files(basePath, validExts=(".jpg", ".jpeg", ".png", ".bmp"), contains=None):
# loop over the directory structure
for (rootDir, dirNames, filenames) in os.walk(basePath):
# loop over the filenames in the current directory
for filename in filenames:
# if the contains string is not none and the filename does not contain
# the supplied string, then ignore the file
if contains is not None and filename.find(contains) == -1:
continue
# determine the file extension of the current file
ext = filename[filename.rfind("."):].lower()
# check to see if the file is an image and should be processed
if ext.endswith(validExts):
# construct the path to the image and yield it
imagePath = os.path.join(rootDir, filename).replace(" ", "\\ ")
yield imagePath
def load_images(directory='', size=(64,64)):
images = []
labels = [] # Integers corresponding to the categories in alphabetical order
label = 0
imagePaths = list(list_images(directory))
for path in imagePaths:
if not('OSX' in path):
path = path.replace('\\','/')
image = cv2.imread(path) #Reading the image with OpenCV
image = cv2.resize(image,size) #Resizing the image, in case some are not of the same size
images.append(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
return images
images=load_images('../input/data')
_,ax = plt.subplots(5,5, figsize = (8,8))
for i in range(5):
for j in range(5):
ax[i,j].imshow(images[5*i+j])
ax[i,j].axis('off')
The objective of a GAN is to train a data generator in order to imitate a given dataset. A GAN is similar to a zero sum game between two neural networks, the generator of data and a discriminator, trained to recognize original data from fakes created by the generator.
At each step, the discriminator is trained on a batch containing real and fake images. The generator is then trained to produce a batch of images.
In order to create effective GANs on images, we must use convolutional layers in the discriminator and in the generator.
In a Deep Convolutional GAN, the data generator has the following structure :
class GAN():
def __init__(self):
self.img_shape = (64, 64, 3)
self.noise_size = 100
optimizer = Adam(0.0002,0.5)
self.discriminator = self.build_discriminator()
self.discriminator.compile(loss='binary_crossentropy',
optimizer=optimizer,
metrics=['accuracy'])
self.generator = self.build_generator()
self.generator.compile(loss='binary_crossentropy', optimizer=optimizer)
self.combined = Sequential()
self.combined.add(self.generator)
self.combined.add(self.discriminator)
self.discriminator.trainable = False
self.combined.compile(loss='binary_crossentropy', optimizer=optimizer)
self.combined.summary()
# Creating the generator, the large kernels in the convolutional layers allow the network to create complex structures.
def build_generator(self):
epsilon = 0.00001 # Small float added to variance to avoid dividing by zero in the BatchNorm layers.
noise_shape = (self.noise_size,)
model = Sequential()
model.add(Dense(4*4*512, activation='linear', input_shape=noise_shape))
model.add(LeakyReLU(alpha=0.2))
model.add(Reshape((4, 4, 512)))
model.add(Conv2DTranspose(512, kernel_size=[4,4], strides=[2,2], padding="same",
kernel_initializer= keras.initializers.TruncatedNormal(stddev=0.02)))
model.add(BatchNormalization(momentum=0.9, epsilon=epsilon))
model.add(LeakyReLU(alpha=0.2))
model.add(Conv2DTranspose(256, kernel_size=[4,4], strides=[2,2], padding="same",
kernel_initializer= keras.initializers.TruncatedNormal(stddev=0.02)))
model.add(BatchNormalization(momentum=0.9, epsilon=epsilon))
model.add(LeakyReLU(alpha=0.2))
model.add(Conv2DTranspose(128, kernel_size=[4,4], strides=[2,2], padding="same",
kernel_initializer= keras.initializers.TruncatedNormal(stddev=0.02)))
model.add(BatchNormalization(momentum=0.9, epsilon=epsilon))
model.add(LeakyReLU(alpha=0.2))
model.add(Conv2DTranspose(64, kernel_size=[4,4], strides=[2,2], padding="same",
kernel_initializer= keras.initializers.TruncatedNormal(stddev=0.02)))
model.add(BatchNormalization(momentum=0.9, epsilon=epsilon))
model.add(LeakyReLU(alpha=0.2))
model.add(Conv2DTranspose(3, kernel_size=[4,4], strides=[1,1], padding="same",
kernel_initializer= keras.initializers.TruncatedNormal(stddev=0.02)))
# Standard activation for the generator of a GAN
model.add(Activation("tanh"))
model.summary()
noise = Input(shape=noise_shape)
img = model(noise)
return Model(noise, img)
def build_discriminator(self):
model = Sequential()
model.add(Conv2D(128, (3,3), padding='same', input_shape=self.img_shape))
model.add(LeakyReLU(alpha=0.2))
model.add(BatchNormalization())
model.add(Conv2D(128, (3,3), padding='same'))
model.add(LeakyReLU(alpha=0.2))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(3,3)))
model.add(Dropout(0.2))
model.add(Conv2D(128, (3,3), padding='same'))
model.add(LeakyReLU(alpha=0.2))
model.add(BatchNormalization())
model.add(Conv2D(128, (3,3), padding='same'))
model.add(LeakyReLU(alpha=0.2))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(3,3)))
model.add(Dropout(0.3))
model.add(Flatten())
model.add(Dense(128))
model.add(LeakyReLU(alpha=0.2))
model.add(Dense(128))
model.add(LeakyReLU(alpha=0.2))
model.add(Dense(1, activation='sigmoid'))
model.summary()
img = Input(shape=self.img_shape)
validity = model(img)
return Model(img, validity)
def train(self, epochs, batch_size=128, metrics_update=50, save_images=100, save_model=2000):
X_train = np.array(images)
X_train = (X_train.astype(np.float32) - 127.5) / 127.5
half_batch = int(batch_size / 2)
mean_d_loss=[0,0]
mean_g_loss=0
for epoch in range(epochs):
idx = np.random.randint(0, X_train.shape[0], half_batch)
imgs = X_train[idx]
noise = np.random.normal(0, 1, (half_batch, self.noise_size))
gen_imgs = self.generator.predict(noise)
# Training the discriminator
# The loss of the discriminator is the mean of the losses while training on authentic and fake images
d_loss = 0.5 * np.add(self.discriminator.train_on_batch(imgs, np.ones((half_batch, 1))),
self.discriminator.train_on_batch(gen_imgs, np.zeros((half_batch, 1))))
# Training the generator
for _ in range(2):
noise = np.random.normal(0, 1, (batch_size, self.noise_size))
valid_y = np.array([1] * batch_size)
g_loss = self.combined.train_on_batch(noise, valid_y)
mean_d_loss[0] += d_loss[0]
mean_d_loss[1] += d_loss[1]
mean_g_loss += g_loss
# We print the losses and accuracy of the networks every 200 batches mainly to make sure the accuracy of the discriminator
# is not stable at around 50% or 100% (which would mean the discriminator performs not well enough or too well)
if epoch % metrics_update == 0:
print ("%d [Discriminator loss: %f, acc.: %.2f%%] [Generator loss: %f]" % (epoch, mean_d_loss[0]/metrics_update, 100*mean_d_loss[1]/metrics_update, mean_g_loss/metrics_update))
mean_d_loss=[0,0]
mean_g_loss=0
# Saving 25 images
if epoch % save_images == 0:
self.save_images(epoch)
# We save the architecture of the model, the weights and the state of the optimizer
# This way we can restart the training exactly where we stopped
if epoch % save_model == 0:
self.generator.save("generator_%d" % epoch)
self.discriminator.save("discriminator_%d" % epoch)
# Saving 25 generated images to have a representation of the spectrum of images created by the generator
def save_images(self, epoch):
noise = np.random.normal(0, 1, (25, self.noise_size))
gen_imgs = self.generator.predict(noise)
# Rescale from [-1,1] into [0,1]
gen_imgs = 0.5 * gen_imgs + 0.5
fig, axs = plt.subplots(5,5, figsize = (8,8))
for i in range(5):
for j in range(5):
axs[i,j].imshow(gen_imgs[5*i+j])
axs[i,j].axis('off')
plt.show()
fig.savefig("animeGenerated/Faces_%d.png" % epoch)
plt.close()
#This folder will contain the images generated during the training
!mkdir animeGenerated
gan=GAN()
gan.train(epochs=15001, batch_size=256, metrics_update=200, save_images=1000, save_model=15000)
2022-06-04 18:13:33.313682: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2022-06-04 18:13:33.439873: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2022-06-04 18:13:33.440714: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2022-06-04 18:13:33.442295: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 AVX512F FMA To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. 2022-06-04 18:13:33.442577: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2022-06-04 18:13:33.443263: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2022-06-04 18:13:33.444072: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2022-06-04 18:13:35.363953: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2022-06-04 18:13:35.364922: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2022-06-04 18:13:35.365563: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2022-06-04 18:13:35.366144: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1510] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 15403 MB memory: -> device: 0, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:04.0, compute capability: 6.0
Model: "sequential" _________________________________________________________________ Layer (type) Output Shape Param # ================================================================= conv2d (Conv2D) (None, 64, 64, 128) 3584 _________________________________________________________________ leaky_re_lu (LeakyReLU) (None, 64, 64, 128) 0 _________________________________________________________________ batch_normalization (BatchNo (None, 64, 64, 128) 512 _________________________________________________________________ conv2d_1 (Conv2D) (None, 64, 64, 128) 147584 _________________________________________________________________ leaky_re_lu_1 (LeakyReLU) (None, 64, 64, 128) 0 _________________________________________________________________ batch_normalization_1 (Batch (None, 64, 64, 128) 512 _________________________________________________________________ max_pooling2d (MaxPooling2D) (None, 21, 21, 128) 0 _________________________________________________________________ dropout (Dropout) (None, 21, 21, 128) 0 _________________________________________________________________ conv2d_2 (Conv2D) (None, 21, 21, 128) 147584 _________________________________________________________________ leaky_re_lu_2 (LeakyReLU) (None, 21, 21, 128) 0 _________________________________________________________________ batch_normalization_2 (Batch (None, 21, 21, 128) 512 _________________________________________________________________ conv2d_3 (Conv2D) (None, 21, 21, 128) 147584 _________________________________________________________________ leaky_re_lu_3 (LeakyReLU) (None, 21, 21, 128) 0 _________________________________________________________________ batch_normalization_3 (Batch (None, 21, 21, 128) 512 _________________________________________________________________ max_pooling2d_1 (MaxPooling2 (None, 7, 7, 128) 0 _________________________________________________________________ dropout_1 (Dropout) (None, 7, 7, 128) 0 _________________________________________________________________ flatten (Flatten) (None, 6272) 0 _________________________________________________________________ dense (Dense) (None, 128) 802944 _________________________________________________________________ leaky_re_lu_4 (LeakyReLU) (None, 128) 0 _________________________________________________________________ dense_1 (Dense) (None, 128) 16512 _________________________________________________________________ leaky_re_lu_5 (LeakyReLU) (None, 128) 0 _________________________________________________________________ dense_2 (Dense) (None, 1) 129 ================================================================= Total params: 1,267,969 Trainable params: 1,266,945 Non-trainable params: 1,024 _________________________________________________________________ Model: "sequential_1" _________________________________________________________________ Layer (type) Output Shape Param # ================================================================= dense_3 (Dense) (None, 8192) 827392 _________________________________________________________________ leaky_re_lu_6 (LeakyReLU) (None, 8192) 0 _________________________________________________________________ reshape (Reshape) (None, 4, 4, 512) 0 _________________________________________________________________ conv2d_transpose (Conv2DTran (None, 8, 8, 512) 4194816 _________________________________________________________________ batch_normalization_4 (Batch (None, 8, 8, 512) 2048 _________________________________________________________________ leaky_re_lu_7 (LeakyReLU) (None, 8, 8, 512) 0 _________________________________________________________________ conv2d_transpose_1 (Conv2DTr (None, 16, 16, 256) 2097408 _________________________________________________________________ batch_normalization_5 (Batch (None, 16, 16, 256) 1024 _________________________________________________________________ leaky_re_lu_8 (LeakyReLU) (None, 16, 16, 256) 0 _________________________________________________________________ conv2d_transpose_2 (Conv2DTr (None, 32, 32, 128) 524416 _________________________________________________________________ batch_normalization_6 (Batch (None, 32, 32, 128) 512 _________________________________________________________________ leaky_re_lu_9 (LeakyReLU) (None, 32, 32, 128) 0 _________________________________________________________________ conv2d_transpose_3 (Conv2DTr (None, 64, 64, 64) 131136 _________________________________________________________________ batch_normalization_7 (Batch (None, 64, 64, 64) 256 _________________________________________________________________ leaky_re_lu_10 (LeakyReLU) (None, 64, 64, 64) 0 _________________________________________________________________ conv2d_transpose_4 (Conv2DTr (None, 64, 64, 3) 3075 _________________________________________________________________ activation (Activation) (None, 64, 64, 3) 0 ================================================================= Total params: 7,782,083 Trainable params: 7,780,163 Non-trainable params: 1,920 _________________________________________________________________ Model: "sequential_2" _________________________________________________________________ Layer (type) Output Shape Param # ================================================================= model_1 (Functional) (None, 64, 64, 3) 7782083 _________________________________________________________________ model (Functional) (None, 1) 1267969 ================================================================= Total params: 9,050,052 Trainable params: 7,780,163 Non-trainable params: 1,269,889 _________________________________________________________________
2022-06-04 18:13:37.346751: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2) 2022-06-04 18:13:39.175983: I tensorflow/stream_executor/cuda/cuda_dnn.cc:369] Loaded cuDNN version 8005
0 [Discriminator loss: 0.025244, acc.: 0.05%] [Generator loss: 0.000382]
2022-06-04 18:13:55.906246: W tensorflow/python/util/util.cc:348] Sets are not currently considered sequences, but this may change in the future, so consider avoiding using them.
200 [Discriminator loss: 0.649526, acc.: 64.80%] [Generator loss: 1.101452] 400 [Discriminator loss: 0.702651, acc.: 54.08%] [Generator loss: 0.932912] 600 [Discriminator loss: 0.708935, acc.: 50.52%] [Generator loss: 0.878296] 800 [Discriminator loss: 0.688447, acc.: 54.98%] [Generator loss: 0.919699] 1000 [Discriminator loss: 0.700884, acc.: 52.20%] [Generator loss: 0.921626]
1200 [Discriminator loss: 0.696865, acc.: 51.82%] [Generator loss: 0.903415] 1400 [Discriminator loss: 0.698036, acc.: 51.79%] [Generator loss: 0.899413] 1600 [Discriminator loss: 0.699364, acc.: 51.56%] [Generator loss: 0.884401] 1800 [Discriminator loss: 0.697126, acc.: 51.76%] [Generator loss: 0.881516] 2000 [Discriminator loss: 0.698240, acc.: 51.41%] [Generator loss: 0.864546]
2200 [Discriminator loss: 0.693557, acc.: 52.88%] [Generator loss: 0.890242] 2400 [Discriminator loss: 0.689268, acc.: 53.08%] [Generator loss: 0.893482] 2600 [Discriminator loss: 0.685572, acc.: 54.15%] [Generator loss: 0.906857] 2800 [Discriminator loss: 0.682025, acc.: 54.97%] [Generator loss: 0.926007] 3000 [Discriminator loss: 0.677429, acc.: 55.83%] [Generator loss: 0.939565]
3200 [Discriminator loss: 0.668041, acc.: 57.88%] [Generator loss: 0.982611] 3400 [Discriminator loss: 0.668865, acc.: 57.98%] [Generator loss: 0.987889] 3600 [Discriminator loss: 0.661320, acc.: 58.67%] [Generator loss: 1.010330] 3800 [Discriminator loss: 0.640698, acc.: 61.79%] [Generator loss: 1.094321] 4000 [Discriminator loss: 0.639253, acc.: 62.20%] [Generator loss: 1.115002]
4200 [Discriminator loss: 0.634386, acc.: 62.95%] [Generator loss: 1.148245] 4400 [Discriminator loss: 0.610432, acc.: 65.20%] [Generator loss: 1.238606] 4600 [Discriminator loss: 0.600058, acc.: 66.70%] [Generator loss: 1.331045] 4800 [Discriminator loss: 0.590097, acc.: 67.51%] [Generator loss: 1.361022] 5000 [Discriminator loss: 0.575873, acc.: 69.03%] [Generator loss: 1.430956]
5200 [Discriminator loss: 0.570991, acc.: 69.56%] [Generator loss: 1.486497] 5400 [Discriminator loss: 0.558747, acc.: 70.57%] [Generator loss: 1.530289] 5600 [Discriminator loss: 0.540543, acc.: 71.80%] [Generator loss: 1.620870] 5800 [Discriminator loss: 0.532802, acc.: 72.88%] [Generator loss: 1.676270] 6000 [Discriminator loss: 0.516909, acc.: 73.98%] [Generator loss: 1.788281]
6200 [Discriminator loss: 0.507527, acc.: 74.62%] [Generator loss: 1.856439] 6400 [Discriminator loss: 0.496545, acc.: 75.29%] [Generator loss: 1.914902] 6600 [Discriminator loss: 0.483419, acc.: 76.37%] [Generator loss: 2.003542] 6800 [Discriminator loss: 0.483122, acc.: 76.39%] [Generator loss: 1.985013] 7000 [Discriminator loss: 0.477613, acc.: 76.78%] [Generator loss: 2.085765]
7200 [Discriminator loss: 0.462546, acc.: 77.61%] [Generator loss: 2.157185] 7400 [Discriminator loss: 0.451467, acc.: 78.47%] [Generator loss: 2.245237] 7600 [Discriminator loss: 0.439294, acc.: 79.10%] [Generator loss: 2.328188] 7800 [Discriminator loss: 0.434284, acc.: 79.61%] [Generator loss: 2.340230] 8000 [Discriminator loss: 0.424915, acc.: 80.07%] [Generator loss: 2.476049]
8200 [Discriminator loss: 0.417314, acc.: 80.76%] [Generator loss: 2.509731] 8400 [Discriminator loss: 0.407895, acc.: 81.01%] [Generator loss: 2.564007] 8600 [Discriminator loss: 0.413795, acc.: 80.86%] [Generator loss: 2.563290] 8800 [Discriminator loss: 0.405358, acc.: 81.36%] [Generator loss: 2.623583] 9000 [Discriminator loss: 0.392623, acc.: 82.01%] [Generator loss: 2.717418]
9200 [Discriminator loss: 0.382624, acc.: 82.52%] [Generator loss: 2.791587] 9400 [Discriminator loss: 0.379241, acc.: 82.75%] [Generator loss: 2.820441] 9600 [Discriminator loss: 0.376366, acc.: 82.98%] [Generator loss: 2.892852] 9800 [Discriminator loss: 0.368259, acc.: 83.36%] [Generator loss: 2.939852] 10000 [Discriminator loss: 0.363847, acc.: 83.71%] [Generator loss: 3.010299]
10200 [Discriminator loss: 0.353042, acc.: 84.19%] [Generator loss: 3.059244] 10400 [Discriminator loss: 0.347265, acc.: 84.62%] [Generator loss: 3.148979] 10600 [Discriminator loss: 0.339466, acc.: 85.16%] [Generator loss: 3.154441] 10800 [Discriminator loss: 0.350772, acc.: 84.53%] [Generator loss: 3.177724] 11000 [Discriminator loss: 0.341773, acc.: 85.07%] [Generator loss: 3.158017]
11200 [Discriminator loss: 0.327791, acc.: 85.54%] [Generator loss: 3.297016] 11400 [Discriminator loss: 0.327153, acc.: 85.72%] [Generator loss: 3.364879] 11600 [Discriminator loss: 0.329424, acc.: 85.68%] [Generator loss: 3.410798] 11800 [Discriminator loss: 0.326960, acc.: 85.56%] [Generator loss: 3.391983] 12000 [Discriminator loss: 0.317053, acc.: 86.16%] [Generator loss: 3.396146]
12200 [Discriminator loss: 0.314937, acc.: 86.19%] [Generator loss: 3.532771] 12400 [Discriminator loss: 0.318966, acc.: 86.00%] [Generator loss: 3.492365] 12600 [Discriminator loss: 0.311542, acc.: 86.58%] [Generator loss: 3.486932] 12800 [Discriminator loss: 0.308075, acc.: 86.72%] [Generator loss: 3.628883] 13000 [Discriminator loss: 0.298607, acc.: 87.14%] [Generator loss: 3.722821]
13200 [Discriminator loss: 0.302987, acc.: 87.00%] [Generator loss: 3.626120] 13400 [Discriminator loss: 0.293449, acc.: 87.41%] [Generator loss: 3.686022] 13600 [Discriminator loss: 0.302977, acc.: 86.99%] [Generator loss: 3.772613] 13800 [Discriminator loss: 0.291718, acc.: 87.43%] [Generator loss: 3.749322] 14000 [Discriminator loss: 0.292686, acc.: 87.38%] [Generator loss: 3.833549]
14200 [Discriminator loss: 0.292850, acc.: 87.60%] [Generator loss: 3.871924] 14400 [Discriminator loss: 0.289580, acc.: 87.80%] [Generator loss: 3.830401] 14600 [Discriminator loss: 0.276900, acc.: 88.24%] [Generator loss: 3.895985] 14800 [Discriminator loss: 0.282965, acc.: 88.09%] [Generator loss: 3.895624] 15000 [Discriminator loss: 0.274472, acc.: 88.36%] [Generator loss: 4.010426]